# Controlled Human Malaria Infection (CHMI) multi-level analyse gameto Pfs25

# Author: Luc Coffeng
# Date created: 11 December 2017


# Prep session
  rm(list=ls())
  
  library(data.table)
  library(rstan)
  library(ggplot2)
  
  rstan_options(auto_write = TRUE)
  options(mc.cores = parallel::detectCores())
  
  base.dir <- "/Users/Luc/Documents/Research/Radboud UMC/Gametocytes/01_Analysis"
  code.dir <- file.path(base.dir,"01_Code")
  out.dir <- file.path(base.dir,"02_Output")
  data.file <- "/Users/luc/Documents/Research/Data/Malaria/CHMI_gameto/Processed/2017-06-14 Results PCR - CHMI-trans1 -  coffeng - FINAL FOR DRYAD.csv"
  
  
# Load and prep data
  detect.limit <- 5
  
  maldata <- fread(data.file)
  
  setnames(maldata,
           old = "Pf25qRT-PCR_mixed_trendline_mL",
           new = "gameto_Pfs25")
  
  maldata <- maldata[!is.na(gameto_Pfs25)]
  maldata[, censored := gameto_Pfs25 < detect.limit]
  maldata[(censored), gameto_Pfs25 := NA]
  
  maldata[, log10_gct_plus1 := log10(gameto_Pfs25 + 1)]
  
  maldata[, ID := as.integer(as.factor(Volunteer))]
  
  # maldata[, mean(log(gameto_Pfs25), na.rm = TRUE), by = group]
  # maldata[, mean(log(gameto_Pfs25), na.rm = TRUE), by = group][, sd(V1)]
  
  # data.plot <- ggplot(data = maldata,
  #        mapping = aes(x = day, y = gameto_Pfs25)) +
  #   geom_point() +
  #   geom_point(data = maldata[(censored)],
  #              mapping = aes(x = day, y = 1),
  #              shape = 1) +
  #   facet_wrap(group ~ Volunteer, ncol = 4, labeller = "label_both") +
  #   scale_x_continuous(name = "\nTime (day)") +
  #   scale_y_log10(name = "Gametocyte concentration (Pfs25)\n",
  #                 breaks = 10^(0:5)) +
  #   expand_limits(y = 1) +
  #   geom_hline(mapping = aes(yintercept = 5), linetype = 2)
  # 
  # data.plot
  # 
  # setwd(out.dir)
  # pdf(file = "data_plot_gameto_Pfs25.pdf")
  # data.plot
  # dev.off()
  
  
### Compile / select data to analyse with Stan
# Data for analysis of proportion of days positive by group
  maldata.pos <- copy(maldata)
  maldata.pos[, day := floor(day)]
  maldata.pos <- maldata.pos[, .(day.pos = any(!is.na(gameto_Pfs25))),
                             by = .(ID, day, group)]
  maldata.pos <- maldata.pos[, .(n.day.pos = sum(day.pos), n.day.tested = .N),
                             by = .(group, ID)]
  setkey(maldata.pos, group, ID)
  
  maldata.pos[, .(prop.days.po = sum(n.day.pos) / sum(n.day.tested)),
              by = group]
  maldata.pos[, .(prop.days.po = sum(n.day.pos) / sum(n.day.tested)),
              by = group][,]
  
  data.days.pos <- list(
    N_obs = maldata.pos[, .N],
    N_group = maldata.pos[, length(unique(group))],
    N_ind = maldata.pos[, length(unique(ID))],
    
    group = maldata.pos[, group],
    ID = maldata.pos[, ID],
    days_tested = maldata.pos[, n.day.tested],
    days_pos = maldata.pos[, n.day.pos] 
  )
  
  
# Data for analysis of mean intensities by group
  test <- maldata[, mean(censored), by = day]
  setkey(test, day)  # conclude: no information before day 16, so exclude those
  
  # maldata <- maldata[day >= 16]
  maldata <- maldata[day >= 20 & day < 40]
  maldata[, .N, by = ID]
  
  maldata.ind.mu <- maldata[, mean(log(gameto_Pfs25), na.rm = TRUE), by  = .(group,ID)]
  maldata.ind.mu[, mean(V1, na.rm = TRUE), by = group]
  
  data.selection <- list(
    N_obs = maldata[(!censored), .N],
    N_cens = maldata[(censored), .N],
    N_group = maldata[, length(unique(group))],
    N_ind = maldata[, length(unique(ID))],
    
    dlimit = detect.limit,
    
    gameto_Pfs25 = maldata[(!censored), gameto_Pfs25],
    ID_obs = maldata[(!censored), ID],
    group_obs = maldata[(!censored), group],
    
    ID_cens = maldata[(censored), ID],
    group_cens = maldata[(censored), group]
  )
  
  
# Fit multi-level model with censoring to gametocyte concentrations with
# independent group means
  stan.model.file <- file.path(code.dir, "CHMI_qPCR_gameto.stan")
  fit <- stan(
    file = stan.model.file,
    model_name = "CHMI_multilevel_gameto (eLife reviewer request)",
    data = data.selection,
    iter = 4000,
    pars = c("mu_group",
             "delta_ind_sigma",
             "sigma_obs",
             "mu_group_real",
             "p_group4_highest"),
    # include = TRUE,
    control = list(stepsize = 0.0025,
                   adapt_delta = 0.99,
                   max_treedepth = 13)
  )
  
  print(fit, digits = 3)
  
  # Inference for Stan model: CHMI_multilevel_gameto (eLife reviewer request).
  # 4 chains, each with iter=4000; warmup=2000; thin=1; 
  # post-warmup draws per chain=2000, total post-warmup draws=8000.
  # 
  #                      mean se_mean      sd     2.5%      25%      50%      75%    97.5% n_eff  Rhat
  # mu_group[1]         2.420   0.013   0.672    1.051    1.985    2.419    2.850    3.738  2727 1.000
  # mu_group[2]         2.180   0.013   0.669    0.858    1.752    2.189    2.603    3.510  2632 1.001
  # mu_group[3]         3.021   0.015   0.689    1.670    2.589    3.022    3.463    4.384  2217 1.001
  # mu_group[4]         4.913   0.013   0.668    3.606    4.477    4.904    5.341    6.260  2608 1.002
  # delta_ind_sigma     1.285   0.006   0.312    0.831    1.065    1.236    1.447    2.031  2379 1.001
  # sigma_obs           1.083   0.001   0.069    0.958    1.035    1.079    1.127    1.230  6159 1.001
  # mu_group_real[1]   14.183   0.235  12.200    2.861    7.278   11.237   17.293   42.029  2698 1.000
  # mu_group_real[2]   11.089   0.169   8.830    2.358    5.764    8.927   13.510   33.453  2723 1.000
  # mu_group_real[3]   26.166   0.466  24.012    5.310   13.322   20.530   31.914   80.195  2653 1.000
  # mu_group_real[4]  171.941   3.007 151.258   36.835   88.014  134.847  208.741  523.255  2529 1.002
  # p_group4_highest    0.970   0.003   0.171    0.000    1.000    1.000    1.000    1.000  3898 1.002
  # lp__             -137.495   0.159   7.117 -152.698 -141.999 -137.050 -132.485 -124.792  1996 1.001
  # 
  # Samples were drawn using NUTS(diag_e) at Fri Dec 15 16:53:35 2017.
  # For each parameter, n_eff is a crude measure of effective sample size,
  # and Rhat is the potential scale reduction factor on split chains (at 
  # convergence, Rhat=1).
  
  
# Fit multi-level model with censoring to gametocyte concentrations with
# explicit modelling of differences between groups 1-3 and group 4
  stan.model.file <- file.path(code.dir, "CHMI_qPCR_gameto_group4_ref.stan")
  fit.ref.gr4 <- stan(
    file = stan.model.file,
    model_name = "CHMI_multilevel_gameto (eLife reviewer request) group 4 as reference",
    data = data.selection,
    iter = 4000,
    pars = c("mu_group_ref",
             "delta_group",
             "delta_ind_sigma",
             "sigma_obs",
             "p_group4_highest"),
    # include = TRUE,
    control = list(stepsize = 0.0025,
                   adapt_delta = 0.99,
                   max_treedepth = 13)
  )
  
  print(fit.ref.gr4, digits = 3)
  
  # Inference for Stan model: CHMI_multilevel_gameto (eLife reviewer request) group 4 as reference.
  # 4 chains, each with iter=4000; warmup=2000; thin=1; 
  # post-warmup draws per chain=2000, total post-warmup draws=8000.
  # 
  #                      mean se_mean    sd     2.5%      25%      50%      75%    97.5% n_eff  Rhat
  # mu_group_ref        4.879   0.016 0.667    3.578    4.457    4.870    5.294    6.207  1839 1.001
  # delta_group[1]     -2.471   0.022 0.981   -4.508   -3.071   -2.451   -1.853   -0.547  1962 1.001
  # delta_group[2]     -2.704   0.021 0.954   -4.573   -3.309   -2.703   -2.092   -0.823  2022 1.002
  # delta_group[3]     -1.865   0.022 0.977   -3.910   -2.482   -1.852   -1.253    0.089  1911 1.001
  # delta_ind_sigma     1.298   0.007 0.313    0.820    1.076    1.246    1.466    2.042  2141 1.002
  # sigma_obs           1.083   0.001 0.070    0.958    1.034    1.078    1.129    1.231  6178 1.000
  # p_group4_highest    0.960   0.003 0.196    0.000    1.000    1.000    1.000    1.000  3756 1.000
  # lp__             -137.295   0.171 7.195 -152.322 -141.968 -136.943 -132.355 -124.011  1774 1.003
  # 
  # Samples were drawn using NUTS(diag_e) at Fri Dec 15 16:52:41 2017.
  # For each parameter, n_eff is a crude measure of effective sample size,
  # and Rhat is the potential scale reduction factor on split chains (at 
  # convergence, Rhat=1).
  
  
# Fit logistic model to proportion of days that individuals are gametocyte+
  stan.model.file <- file.path(code.dir, "CHMI_qPCR_gameto_logistic.stan")
  fit.ref.logit <- stan(
    file = stan.model.file,
    model_name = "CHMI_multilevel_gameto (eLife reviewer request) logistic",
    data = data.days.pos,
    iter = 4000,
    pars = c("mu_group",
             "mu_group_real",
             "delta_ind_sigma",
             "p_group4_highest",
             "p_group4_higher_than_group12",
             "p_group3_higher_than_group12",
             "p_group1_higher_than_group2",
             "p_group4_higher_than_group3",
             "p_group34_highest"),
    # include = TRUE,
    control = list(stepsize = 0.0025,
                   adapt_delta = 0.99,
                   max_treedepth = 13)
  )
  
  print(fit.ref.logit, digits = 3)
  
  # Inference for Stan model: CHMI_multilevel_gameto (eLife reviewer request) logistic.
  # 4 chains, each with iter=4000; warmup=2000; thin=1; 
  # post-warmup draws per chain=2000, total post-warmup draws=8000.
  # 
  #                                  mean se_mean    sd     2.5%      25%      50%      75%    97.5% n_eff  Rhat
  # mu_group[1]                    -1.025   0.005 0.353   -1.765   -1.239   -1.012   -0.798   -0.361  4824 1.000
  # mu_group[2]                    -0.597   0.005 0.342   -1.289   -0.811   -0.594   -0.384    0.078  4207 1.001
  # mu_group[3]                     0.053   0.005 0.330   -0.594   -0.155    0.051    0.253    0.704  3941 1.000
  # mu_group[4]                    -0.068   0.005 0.336   -0.732   -0.275   -0.068    0.142    0.613  4124 1.000
  # mu_group_real[1]                0.270   0.001 0.067    0.146    0.225    0.267    0.310    0.411  4892 1.000
  # mu_group_real[2]                0.359   0.001 0.076    0.216    0.308    0.356    0.405    0.520  4222 1.001
  # mu_group_real[3]                0.513   0.001 0.079    0.356    0.461    0.513    0.563    0.669  3972 1.000
  # mu_group_real[4]                0.483   0.001 0.081    0.325    0.432    0.483    0.535    0.649  4150 1.000
  # delta_ind_sigma                 0.513   0.005 0.222    0.123    0.366    0.493    0.635    1.004  1914 1.002
  # p_group4_highest                0.372   0.007 0.483    0.000    0.000    0.000    1.000    1.000  5028 1.001
  # p_group4_higher_than_group12    0.869   0.005 0.338    0.000    1.000    1.000    1.000    1.000  4708 1.000
  # p_group3_higher_than_group12    0.917   0.004 0.275    0.000    1.000    1.000    1.000    1.000  5308 1.000
  # p_group1_higher_than_group2     0.173   0.005 0.378    0.000    0.000    0.000    0.000    1.000  4750 1.000
  # p_group4_higher_than_group3     0.386   0.007 0.487    0.000    0.000    0.000    1.000    1.000  5127 1.000
  # p_group34_highest               0.823   0.006 0.382    0.000    1.000    1.000    1.000    1.000  4698 1.000
  # lp__                         -312.016   0.112 4.455 -321.961 -314.741 -311.612 -308.844 -304.555  1583 1.003
  # 
  # Samples were drawn using NUTS(diag_e) at Fri Dec 15 17:45:04 2017.
  # For each parameter, n_eff is a crude measure of effective sample size,
  # and Rhat is the potential scale reduction factor on split chains (at 
  # convergence, Rhat=1).
  
  
### END OF CODE
  
  
  